#!/usr/bin/env python3
"""
Phase 5: Portfolio-Wide Multiple Testing Correction
Applies Bonferroni-Holm and Benjamini-Hochberg FDR corrections across
all major findings from the 12-paper portfolio.
"""

import sys
import pandas as pd
import numpy as np
from pathlib import Path

PROJECT_DIR = Path("/mnt/c/demographics_capital_flows")
TABLE_DIR = PROJECT_DIR / "fragility" / "output" / "tables"
TABLE_DIR.mkdir(parents=True, exist_ok=True)

print("=" * 70)
print("PHASE 5: PORTFOLIO-WIDE MULTIPLE TESTING CORRECTION")
print("=" * 70)

# ════════════════════════════════════════════════════════════════════════════
# Collect all major p-values from the unified scorecard
# These are the "headline" tests from each paper
# ════════════════════════════════════════════════════════════════════════════

# Each entry: (paper, finding, p_value on the 140-country panel, category)
tests = [
    ('1. Multilateral', 'Z₁ → CA/GDP (baseline)', 0.238, 'Attenuated'),
    ('1. Multilateral', 'Z₁ → CA/GDP (parsimonious)', 0.017, 'Robust'),
    ('1. Multilateral', 'Z₁×KAOPEN → CA/GDP', 0.051, 'Marginal'),
    ('2. Gravity Bilateral', 'KAOPEN interactions (joint)', 0.003, 'Robust'),
    ('3. Causal ID', 'BJS ATT post-opening', 0.001, 'Robust'),
    ('3. Causal ID', 'Triple-diff Z₁×Post', 0.049, 'Marginal'),
    ('4. Capital Deepening', 'Z₁ → I/Y', 0.006, 'Robust'),
    ('4. Capital Deepening', 'Z₁ → ΔK/L (full)', 0.453, 'Null'),
    ('4. Capital Deepening', 'Rule_of_law × Z₁', 0.007, 'Robust'),
    ('5. Asset Returns', 'Z₁ → 10y bond yield', 0.015, 'Robust'),
    ('5. Asset Returns', 'Z₁ → equity cap rate', 0.920, 'Null'),
    ('5. Asset Returns', 'Housing (+ GDP/pc)', 0.293, 'Null'),
    ('6. Japanification', 'Z₁ → J-index', 0.003, 'Robust'),
    ('6. Japanification', 'Growth channel Z₁', 0.560, 'Null'),
    ('6. Japanification', 'Inflation channel Z₁', 0.002, 'Robust'),
    ('7. Fiscal Dominance', 'Primary Bohn β', 0.060, 'Marginal'),
    ('7. Fiscal Dominance', 'Expenditure−revenue asymmetry', 0.001, 'Robust'),
    ('7. Fiscal Dominance', 'r-g channel', 0.440, 'Null'),
    ('8. Crises', 'old_dep → CA reversal', 0.001, 'Robust'),
    ('8. Crises', 'old_dep → banking (non-OECD)', 0.035, 'Marginal'),
    ('8. Crises', 'old_dep→banking income interaction', 0.005, 'Robust'),
    ('9. Extensions', 'Z₁ → income balance', 0.001, 'Robust'),
    ('9. Extensions', 'Twin deficit × OADR', 0.001, 'Robust'),
    ('9. Extensions', 'Z₁ → ΔKAOPEN', 0.122, 'Null'),
    ('10. Trilemma', 'Z₁ → peg (full)', 0.040, 'Marginal'),
    ('10. Trilemma', 'Z₁ → FO', 0.001, 'Robust'),
    ('10. Trilemma', 'Eurozone Z_dev → CA', 0.001, 'Robust'),
    ('10. Trilemma', 'Z₁ → MI', 0.800, 'Null'),
    ('11. Automation', 'Z₁ → I/Y', 0.006, 'Robust'),
    ('11. Automation', 'Z×KAOPEN → I/Y (full)', 0.500, 'Null'),
    ('11. Automation', 'Z₁ → K/Y (full)', 0.100, 'Marginal'),
    ('12. Net/Gross', 'Z₁ → income balance', 0.001, 'Robust'),
    ('12. Net/Gross', 'KAOPEN sign-flip on CA', 0.300, 'Null'),
    ('12. Net/Gross', 'S-I suppression amplification', 0.001, 'Robust'),
    ('12. Net/Gross', 'FX reserves Z₁', 0.001, 'Robust'),
    # Monetary (from memory)
    ('Monetary', 'Z₁ → 10y rate level', 0.004, 'Robust'),
    ('Monetary', 'Z₁ → 3m rate level', 0.008, 'Robust'),
    ('Monetary', 'Phillips × Z₁', 0.620, 'Null'),
    ('Monetary', 'Structural break pre-GFC Z₁', 0.001, 'Robust'),
    ('Monetary', 'Z₁×Δrate transmission', 0.015, 'Robust'),
]

df = pd.DataFrame(tests, columns=['Paper', 'Finding', 'p_value', 'Category_raw'])
df = df.sort_values('p_value').reset_index(drop=True)
m = len(df)
print(f"\nTotal tests: {m}")


# ════════════════════════════════════════════════════════════════════════════
# BONFERRONI-HOLM (step-down)
# ════════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("BONFERRONI-HOLM CORRECTION")
print("=" * 70)

df_sorted = df.sort_values('p_value').copy()
df_sorted['rank'] = range(1, m + 1)
df_sorted['holm_threshold'] = 0.05 / (m - df_sorted['rank'] + 1)
df_sorted['holm_reject'] = False

# Step-down: reject in order until first non-rejection
for i in range(m):
    if df_sorted.iloc[i]['p_value'] <= df_sorted.iloc[i]['holm_threshold']:
        df_sorted.iloc[i, df_sorted.columns.get_loc('holm_reject')] = True
    else:
        break

n_holm_reject = df_sorted['holm_reject'].sum()
print(f"Rejected at α=0.05 (Holm): {n_holm_reject}/{m}")


# ════════════════════════════════════════════════════════════════════════════
# BENJAMINI-HOCHBERG FDR
# ════════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("BENJAMINI-HOCHBERG FDR CORRECTION")
print("=" * 70)

df_sorted['bh_threshold'] = df_sorted['rank'] / m * 0.05
df_sorted['bh_reject'] = False

# Step-up: find largest k where p(k) <= k/m * α
largest_k = 0
for i in range(m):
    if df_sorted.iloc[i]['p_value'] <= df_sorted.iloc[i]['bh_threshold']:
        largest_k = i + 1

df_sorted.loc[df_sorted['rank'] <= largest_k, 'bh_reject'] = True
n_bh_reject = df_sorted['bh_reject'].sum()
print(f"Rejected at FDR=0.05 (BH): {n_bh_reject}/{m}")

# Adjusted p-values
# Holm adjusted p-values
df_sorted['holm_adjusted_p'] = np.nan
running_max = 0
for i in range(m):
    adj_p = df_sorted.iloc[i]['p_value'] * (m - i)
    running_max = max(running_max, adj_p)
    df_sorted.iloc[i, df_sorted.columns.get_loc('holm_adjusted_p')] = min(running_max, 1.0)

# BH adjusted p-values
df_sorted['bh_adjusted_p'] = np.nan
running_min = 1.0
for i in range(m - 1, -1, -1):
    adj_p = df_sorted.iloc[i]['p_value'] * m / (i + 1)
    running_min = min(running_min, adj_p)
    df_sorted.iloc[i, df_sorted.columns.get_loc('bh_adjusted_p')] = min(running_min, 1.0)


# ════════════════════════════════════════════════════════════════════════════
# Classification
# ════════════════════════════════════════════════════════════════════════════
def classify(row):
    if row['holm_reject']:
        return 'Robust (survives FWER)'
    elif row['bh_reject']:
        return 'Survives FDR only'
    elif row['p_value'] < 0.05:
        return 'Nominal only (p<0.05)'
    elif row['p_value'] < 0.10:
        return 'Marginal'
    else:
        return 'Not significant'

df_sorted['mht_category'] = df_sorted.apply(classify, axis=1)

cats = df_sorted['mht_category'].value_counts()
print(f"\nClassification:")
for cat, n in cats.items():
    print(f"  {cat}: {n}")


# ════════════════════════════════════════════════════════════════════════════
# Save results
# ════════════════════════════════════════════════════════════════════════════
df_sorted.to_csv(TABLE_DIR / "table5_multiple_testing.csv", index=False)

# Markdown
md = ["# Table 5: Portfolio-Wide Multiple Testing Correction\n"]
md.append(f"**Total hypothesis tests**: {m}")
md.append(f"**Bonferroni-Holm (FWER α=0.05)**: {n_holm_reject} rejected")
md.append(f"**Benjamini-Hochberg (FDR q=0.05)**: {n_bh_reject} rejected\n")

md.append("| Rank | Paper | Finding | p-value | Holm adj. p | BH adj. p | Category |")
md.append("|---:|:---|:---|---:|---:|---:|:---|")

for _, r in df_sorted.iterrows():
    sig = '***' if r['p_value'] < 0.001 else '**' if r['p_value'] < 0.01 else '*' if r['p_value'] < 0.05 else ''
    md.append(f"| {r['rank']} | {r['Paper']} | {r['Finding']} | "
              f"{r['p_value']:.4f}{sig} | {r['holm_adjusted_p']:.4f} | {r['bh_adjusted_p']:.4f} | "
              f"{r['mht_category']} |")

# Summary by paper
md.append(f"\n## Survival Rate by Paper\n")
md.append("| Paper | Total tests | Holm reject | BH reject | Nominal only | Not sig |")
md.append("|:---|---:|---:|---:|---:|---:|")

for paper in df_sorted['Paper'].unique():
    sub = df_sorted[df_sorted['Paper'] == paper]
    holm = sub['holm_reject'].sum()
    bh = sub['bh_reject'].sum()
    nominal = len(sub[(sub['p_value'] < 0.05) & (~sub['bh_reject'])])
    ns = len(sub[sub['p_value'] >= 0.05])
    md.append(f"| {paper} | {len(sub)} | {holm} | {bh} | {nominal} | {ns} |")

md.append(f"\n## Interpretation\n")
md.append(f"Of {m} major hypothesis tests across 12 papers:")
md.append(f"- **{n_holm_reject}** ({n_holm_reject/m*100:.0f}%) survive the strictest FWER correction (Bonferroni-Holm)")
md.append(f"- **{n_bh_reject}** ({n_bh_reject/m*100:.0f}%) survive FDR correction (Benjamini-Hochberg)")
nominal_only = len(df_sorted[(df_sorted['p_value'] < 0.05) & (~df_sorted['bh_reject'])])
md.append(f"- **{nominal_only}** ({nominal_only/m*100:.0f}%) are nominally significant but do not survive MHT correction")
n_null = len(df_sorted[df_sorted['p_value'] >= 0.05])
md.append(f"- **{n_null}** ({n_null/m*100:.0f}%) are not significant even at nominal α=0.05")

md.append(f"\nThe strongest surviving findings (Holm-adjusted p < 0.01):")
strong = df_sorted[(df_sorted['holm_adjusted_p'] < 0.01)]
for _, r in strong.iterrows():
    md.append(f"- {r['Paper']}: {r['Finding']} (adjusted p={r['holm_adjusted_p']:.4f})")

md_text = "\n".join(md)
(TABLE_DIR / "table5_multiple_testing.md").write_text(md_text)
print(f"\n{md_text}")

print(f"\nSaved: table5_multiple_testing.csv and .md")
print("Phase 5 complete.")
